suppressPackageStartupMessages({
  library(epiwraps)
  library(rtracklayer)
  library(GenomicRanges)
  library(ggplot2)
  library(ComplexHeatmap)
  library(chromVAR)
  library(motifmatchr)
  library(memes)
  library(MotifDb)
  library(universalmotif)
  library(TFBSTools)
  library(AnnotationHub)
  library(RColorBrewer)
  library(viridis)
  library(viridisLite)
  library(ggrepel)
  library(magick)
  library(ggpubr)
  library(cowplot)
})
## Possible Ensembl SSL connectivity problems detected.
## Please see the 'Connection Troubleshooting' section of the biomaRt vignette
## vignette('accessing_ensembl', package = 'biomaRt')Error in curl::curl_fetch_memory(url, handle = handle) : 
##   Peer certificate cannot be authenticated with given CA certificates: [uswest.ensembl.org] SSL certificate problem: certificate has expired
## See system.file("LICENSE", package="MotifDb") for use restrictions.
theme_set(theme_bw())
set.seed(123)

1 A

1.1 load and prepare Data

load signal files load Veh track for NF,Mono and Full; and give names

bw_ATAC_mono <- list.files(path = "../Dex_Veh/tracks/", pattern ="*Veh.(.*)mono.bw",full.names = TRUE )
bw_ATAC_nf <- list.files(path = "../Dex_Veh/tracks/", pattern ="*Veh.(.*)NF.bw",full.names = TRUE )
bw_ATAC_full <- list.files(path = "../Dex_Veh/tracks/", pattern ="*Veh.(.*)full.bw",full.names = TRUE )


names(bw_ATAC_mono) <- c("ATAC_Mono_1","ATAC_Mono_2")
names(bw_ATAC_nf) <- c("ATAC_NF_1","ATAC_NF_2")
names(bw_ATAC_full) <- c("ATAC_Full_1","ATAC_Full_2")

import 5fC sites and getting unique 5fC and no5fC sites

sites_5fC <- readRDS("../object_resources/5fC/sites_5fC.rds")
sites_no5fC <- readRDS("../object_resources/5fC/sites_no5fC.rds")

load TSS with and without 5fC

TSS_2KB_5fC <- readRDS("../object_resources/5fC/TSS_2KB_5fC.rds")
TSS_2KB_no5fC <- readRDS("../object_resources/5fC/TSS_2KB_no5fC.rds")

generation of normalization factors

bg_nf <- bwNormFactors(c(bw_ATAC_nf,bw_ATAC_mono,bw_ATAC_full), method = "background")
## Warning: bwNormFactors is deprecated, please gradually switch to
## `getNormFactors`.
## Comparing coverage in random regions...

sample no5fC regions to match

sampled_no5fC <- TSS_2KB_no5fC[sample(c(1:length(TSS_2KB_no5fC)),size = length(TSS_2KB_5fC))] 
list_5fC_no5fC <- unlist(GRangesList(TSS_2KB_5fC,sampled_no5fC))

cluster regions 5fC top and no5fC bottom

# cluster regions 5fC top and no5fC bottom
i <- integer(length(list_5fC_no5fC))
i[1:(length(list_5fC_no5fC)/2)] <- "TSS with 5fC"
i[((length(list_5fC_no5fC)/2)+1):length(list_5fC_no5fC)] <- "TSS with no 5fC"
#saveRDS(i,"../object_resources/figure_4_meta/cluster_5fC_no5fC.LIST.rds")
mycolors <- c("TSS with 5fC"=brewer.pal(n=9,"Set1")[1], "TSS with no 5fC"=brewer.pal(n=9,"Set1")[2])
m_5fC_no5fC <- signal2Matrix(c(bw_ATAC_nf,bw_ATAC_mono,bw_ATAC_full),w = 2L, regions = list_5fC_no5fC,type = "center")
## Reading ../Dex_Veh/tracks//Veh4.NF.bw
## Reading ../Dex_Veh/tracks//Veh5.NF.bw
## Reading ../Dex_Veh/tracks//Veh4.mono.bw
## Reading ../Dex_Veh/tracks//Veh5.mono.bw
## Reading ../Dex_Veh/tracks//Veh4.full.bw
## Reading ../Dex_Veh/tracks//Veh5.full.bw
m_5fC_no5fC_bg <- rescaleSignalMatrices(m_5fC_no5fC,bg_nf)

#merging replicates in the signal matrices
sm2 <- list( Nuc.Free =mergeSignalMatrices(m_5fC_no5fC_bg[1:2],aggregation = "mean"), Mononucleosome=mergeSignalMatrices(m_5fC_no5fC_bg[3:4],aggregation = "mean"), Full=mergeSignalMatrices(m_5fC_no5fC_bg[5:6],aggregation = "mean") )
 saveRDS(sm2,"../object_resources/figure_4_meta/matrix_mm.rds")
m_mm <- readRDS("../object_resources/figure_4_meta/matrix_mm.rds")

1.2 generate A

mycolors <- c("TSS with 5fC"=brewer.pal(n=9,"Set1")[1], "TSS with no 5fC"=brewer.pal(n=9,"Set1")[2])
i <- readRDS("../object_resources/figure_1_meta/cluster_5fC_no5fC.LIST.rds")
p1 <- epiwraps::plotEnrichedHeatmaps((m_mm), row_split = i,scale_title = "Backg.\nnorm.\ndensity", axis_name = c("-2kb","TSS","+2kb"), colors = rocket(100), mean_color = mycolors, mean_scale_side = "left", top_annotation = FALSE,use_raster = TRUE)
p1

#saveRDS(p1,"../object_resources/figure_4_meta/p1.rds")
#p1 <- readRDS("../object_resources/figure_4_meta/p1.rds")

2 B

2.1 prepare Data

generate individual matrices for 5fC and no5fC

# with 5fC
m_5fC <- signal2Matrix(c(bw_ATAC_nf,bw_ATAC_mono,bw_ATAC_full),w = 10L, regions = TSS_2KB_5fC)
## Reading ../Dex_Veh/tracks//Veh4.NF.bw
## Reading ../Dex_Veh/tracks//Veh5.NF.bw
## Reading ../Dex_Veh/tracks//Veh4.mono.bw
## Reading ../Dex_Veh/tracks//Veh5.mono.bw
## Reading ../Dex_Veh/tracks//Veh4.full.bw
## Reading ../Dex_Veh/tracks//Veh5.full.bw
m_5fC_bg <- rescaleSignalMatrices(m_5fC,bg_nf)

# without 5fC
m_no5fC <- signal2Matrix(c(bw_ATAC_nf,bw_ATAC_mono,bw_ATAC_full),w = 10L, regions = TSS_2KB_no5fC)
## Reading ../Dex_Veh/tracks//Veh4.NF.bw
## Reading ../Dex_Veh/tracks//Veh5.NF.bw
## Reading ../Dex_Veh/tracks//Veh4.mono.bw
## Reading ../Dex_Veh/tracks//Veh5.mono.bw
## Reading ../Dex_Veh/tracks//Veh4.full.bw
## Reading ../Dex_Veh/tracks//Veh5.full.bw
m_no5fC_bg <- rescaleSignalMatrices(m_no5fC,bg_nf)

aggregate signal

# with the matrices we generate dataframes with mean signal per position
d_5fC <- meltSignals(m_5fC_bg,trim = c(0.02,0.98))
d_no5fC <- meltSignals(m_no5fC_bg,trim = c(0.02,0.98))

# we merge both dataframes
dmerge <- dplyr::bind_rows(list("5fC"=d_5fC, "no5fC"=d_no5fC), .id="regions")
dmerge
#saveRDS(dmerge,"../object_resources/figure_4_meta/dmerge_1.rds")
# we load the merged dataframe 
#dmerge <- readRDS("../object_resources/figure_4_meta/dmerge_1.rds")

2.2 generate figure B

# we plot the aggregate signals by region and fragment size distribution


p2 <- plot_grid(
ggplot(dmerge[dmerge$sample == "ATAC_NF_1"|dmerge$sample == "ATAC_NF_2",], aes(position, mean, colour=regions, group = regions, fill = regions)) +
  geom_vline(xintercept=0, linetype="dashed") +
  labs(x="TSS", y="mean background norm. density")+
  stat_summary(geom="ribbon", alpha=0.3, colour = NA ) + stat_summary(fun=mean, geom="line", size = 0.6)+
  ggtitle("Nuc. Free")+ scale_color_brewer(palette="Set1")+scale_fill_brewer(palette="Set1")+ylim(0,0.1)+theme(text = element_text(size = 10),legend.position = "none"),
ggplot(dmerge[dmerge$sample == "ATAC_Mono_1"|dmerge$sample == "ATAC_Mono_2",], aes(position, mean, colour=regions, group = regions, fill = regions)) +
  geom_vline(xintercept=0, linetype="dashed") +
  stat_summary(geom="ribbon", alpha=0.3, colour = NA ) + stat_summary(fun=mean, geom="line", size = 0.6)+ 
  labs(x="TSS")+
  ggtitle("Mononucleosome")+ scale_color_brewer(palette="Set1")+scale_fill_brewer(palette="Set1")+ylim(0,0.1)+theme(text = element_text(size = 10),axis.title.y = element_blank(),legend.position = "none",axis.text.y = element_blank()),
ggplot(dmerge[dmerge$sample == "ATAC_Full_1"|dmerge$sample == "ATAC_Full_2",], aes(position, mean, colour=regions, group = regions, fill = regions)) +
  geom_vline(xintercept=0, linetype="dashed") +
  stat_summary(geom="ribbon", alpha=0.3, colour = NA ) + stat_summary(fun=mean, geom="line", size = 0.6)+
  xlab("TSS")+
  ggtitle("Full")+ scale_color_brewer(palette="Set1")+scale_fill_brewer(palette="Set1")+ylim(0,0.1)+theme(text = element_text(size = 10),axis.title.y = element_blank(),axis.text.y = element_blank()),
 ncol = 3, nrow = 1,rel_widths = c(1.15,1,1.2)
) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
p2

#saveRDS(p2,"../object_resources/figure_4_meta/p2.rds")
#p2 <- readRDS("../object_resources/figure_4_meta/p2.rds")

3 C

3.1 load and prepare data

previous data was generated with methylation data from an unpublished source therefore it is now repeated with published work (rando)

import unique 5fC sites in proximity to TSS

unique_5fC_sites_in_TSS <- readRDS("../object_resources/5fC/unique_5fC_sites_in_TSS.rds")

import genome fasta file (GRCm38)

genome <- Rsamtools::FaFile("../object_resources/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa")
genome
## class: FaFile 
## path: ../object_resources/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa
## index: ../object_resources.../Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.fai
## gzindex: ../object_resourc.../Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gzi
## isOpen: FALSE 
## yieldSize: NA

loading Rando data methylation levels at promoters and perform liftover (PL)

methylation_mm9 <- readRDS("../object_resources/GSE75323_sperm_prom_methylation.mm9.rds")

methylation_mm9 <- GRanges(methylation_mm9)
seqlevelsStyle(methylation_mm9)
## [1] "UCSC"
mm9_mm10_chain <- import.chain("../object_resources//mm9ToMm10.over.chain")

methylation_total <- liftOver(methylation_mm9,chain = mm9_mm10_chain)
methylation_total <- unlist(methylation_total)
seqlevelsStyle(methylation_total) <- "Ensembl"
saveRDS(methylation_total,"../object_resources/methylation_total_grcm38.GR.rds")

load GRCm38 sperm methylation data and check for promoters with methylation score >0.5

#methylation_total <- readRDS("../object_resources/methylation_rando_grcm38.GR.rds")
methyl_promoters <- methylation_total[methylation_total$meanMeth>0.5]

load motifs and check select non-redundant TFs

core <- read_meme(file = "../object_resources/HOCOMOCOv11_core_MOUSE_mono_meme_format.meme") # needs to be the right path
#full <- read_meme(file = "HOCOMOCOv11_full_MOUSE_mono_meme_format.meme") 


# original function PL
summary_core <- universalmotif::summarise_motifs(core)
core <- setNames(object = core,nm = summary_core$name)
  
motifs <- core
modf <- data.frame(row.names=summary_core$name,
                     TF=gsub("_MOUSE.H11MO...+","",summary_core$name),
                     grade=gsub(".+\\.","",summary_core$name),
                     motif_rank = gsub("[^01]+","",gsub(".*MO.",replacement = "",summary_core$name))) #gsub("[^ro]+", "", str1)
modf <- modf[order(modf$TF,modf$grade,modf$motif_rank),]
modf <- modf[!duplicated(modf$TF),]
modf <- modf[!modf$grade == "D",]
motifs <- setNames(universalmotif::convert_motifs(motifs[row.names(modf)]), modf$TF)
peak_centers <- resize(unique_5fC_sites_in_TSS,fix = "center",width = 200)
peak_seqs <- memes::get_sequence(dropSeqlevels(peak_centers,"MT",pruning.mode = "coarse"), genome)# dropping MT due to its absence in  reference genome file
peak_centers <- reduce(promoters(methyl_promoters,  upstream = 1000, downstream = 100))
peak_seqs_methyl_prom <- memes::get_sequence(dropSeqlevels(peak_centers,"MT",pruning.mode = "coarse"), genome)# dropping MT due to its absence in  reference genome file

ame <- memes::runAme(peak_seqs,control = peak_seqs_methyl_prom , database=convert_motifs(motifs,class = "universalmotif-universalmotif"), meme_path="/common/meme/bin/")
head(ame)
ame$motif <- basename(ame$motif_db)
#saveRDS(ame,"../object_resources/figure_4_meta/motif_enrichment_5fC_5mC_promoter.rds")

load sample

#ame <- readRDS("../object_resources/figure_4_meta/motif_enrichment_5fC_5mC_promoter.rds")

ame <- ame[!ame$fp==0,] #TFs with no false positive are removed since no fold change can be calculated

3.2 generate figure C

# results with no false positive were removed
p3 <- ggplot(ame, aes(log2(((tp/pos))/((fp/neg))), -log10(adj.pvalue))) + 
  geom_point(alpha=0.7, size =2.5) + geom_text_repel(data=head((ame),10), aes(label=motif),max.overlaps = 20, size = 4,nudge_y = 0.5) +
  labs(x="log2FE")+ xlim(c(0,6.5))+theme(text = element_text(size = 12))#+ ggtitle("5fC sites in TSS proximity compared to methylated promoter regions")
p3.2 <- p3 + geom_hline(yintercept=-log10(0.05), linetype="dashed")+ 
  geom_text(aes(5.5,-log10(0.05),label = "adjst.p-value = 0.05", vjust = -1.0),size = 3.0)
p3.2

3.3 combine all figures as fig.4

#plot_grid(grid.grabExpr(ComplexHeatmap::draw(p1)), nrow=1, rel_widths=c(5,3))
pp <- plot_grid(plot_grid(grid.grabExpr(ComplexHeatmap::draw(p1)),p3.2,labels = c("A","C"),rel_widths = c(3,2)),p2,labels = c(NA,"B"),nrow = 2,rel_heights = c(3,2))
pp
## Warning: Removed 1 rows containing missing values (`geom_text()`).

pp
## Warning: Removed 1 rows containing missing values (`geom_text()`).

pdf("figure_4_meta.pdf", width=10, height=7.5)
pp # no figure head
## Warning: Removed 1 rows containing missing values (`geom_text()`).
dev.off()
## png 
##   2
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] cowplot_1.1.1          ggpubr_0.4.0           magick_2.8.1          
##  [4] ggrepel_0.9.3          viridis_0.6.2          viridisLite_0.4.2     
##  [7] RColorBrewer_1.1-3     AnnotationHub_2.22.1   BiocFileCache_1.14.0  
## [10] dbplyr_2.1.1           TFBSTools_1.28.0       universalmotif_1.8.5  
## [13] MotifDb_1.32.0         Biostrings_2.58.0      XVector_0.30.0        
## [16] memes_0.99.11          motifmatchr_1.12.0     chromVAR_1.12.0       
## [19] ggplot2_3.4.2          rtracklayer_1.50.0     epiwraps_0.99.72      
## [22] EnrichedHeatmap_1.29.2 ComplexHeatmap_2.15.1  GenomicRanges_1.42.0  
## [25] GenomeInfoDb_1.26.7    IRanges_2.24.1         S4Vectors_0.28.1      
## [28] BiocGenerics_0.36.1   
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                R.methodsS3_1.8.2            
##   [3] tidyr_1.2.0                   bit64_4.0.5                  
##   [5] knitr_1.45                    DelayedArray_0.16.3          
##   [7] R.utils_2.11.0                data.table_1.14.8            
##   [9] rpart_4.1.21                  KEGGREST_1.30.1              
##  [11] RCurl_1.98-1.13               AnnotationFilter_1.14.0      
##  [13] doParallel_1.0.17             generics_0.1.3               
##  [15] GenomicFeatures_1.42.3        RSQLite_2.2.11               
##  [17] bit_4.0.5                     tzdb_0.3.0                   
##  [19] xml2_1.3.5                    httpuv_1.6.9                 
##  [21] SummarizedExperiment_1.20.0   assertthat_0.2.1             
##  [23] DirichletMultinomial_1.32.0   xfun_0.41                    
##  [25] hms_1.1.1                     jquerylib_0.1.4              
##  [27] evaluate_0.23                 promises_1.2.0.1             
##  [29] fansi_1.0.5                   progress_1.2.2               
##  [31] caTools_1.18.2                DBI_1.1.3                    
##  [33] htmlwidgets_1.5.4             purrr_1.0.1                  
##  [35] ellipsis_0.3.2                dplyr_1.1.1                  
##  [37] backports_1.4.1               annotate_1.68.0              
##  [39] biomaRt_2.46.3                MatrixGenerics_1.2.1         
##  [41] vctrs_0.6.1                   Biobase_2.50.0               
##  [43] ensembldb_2.14.1              Cairo_1.6-0                  
##  [45] abind_1.4-5                   cachem_1.0.7                 
##  [47] withr_2.5.2                   Gviz_1.34.1                  
##  [49] BSgenome_1.58.0               vroom_1.5.7                  
##  [51] checkmate_2.3.0               GenomicAlignments_1.26.0     
##  [53] prettyunits_1.2.0             cluster_2.1.4                
##  [55] lazyeval_0.2.2                seqLogo_1.56.0               
##  [57] crayon_1.5.2                  edgeR_3.32.1                 
##  [59] pkgconfig_2.0.3               labeling_0.4.3               
##  [61] pkgload_1.3.2                 ProtGenerics_1.22.0          
##  [63] nnet_7.3-19                   rlang_1.1.2                  
##  [65] lifecycle_1.0.4               miniUI_0.1.1.1               
##  [67] dichromat_2.0-0.1             rprojroot_2.0.4              
##  [69] matrixStats_1.1.0             Matrix_1.6-3                 
##  [71] ggseqlogo_0.1                 carData_3.0-5                
##  [73] base64enc_0.1-3               processx_3.8.2               
##  [75] GlobalOptions_0.1.2           png_0.1-8                    
##  [77] rjson_0.2.21                  bitops_1.0-7                 
##  [79] splitstackshape_1.4.8         cmdfun_1.0.2                 
##  [81] R.oo_1.25.0                   blob_1.2.2                   
##  [83] shape_1.4.6                   stringr_1.5.0                
##  [85] readr_2.1.2                   jpeg_0.1-10                  
##  [87] rstatix_0.7.0                 ggsignif_0.6.4               
##  [89] CNEr_1.26.0                   scales_1.2.1                 
##  [91] memoise_2.0.1                 magrittr_2.0.3               
##  [93] plyr_1.8.9                    zlibbioc_1.36.0              
##  [95] compiler_4.0.3                clue_0.3-65                  
##  [97] Rsamtools_2.6.0               cli_3.6.1                    
##  [99] ps_1.7.5                      pbapply_1.7-2                
## [101] htmlTable_2.4.0               Formula_1.2-5                
## [103] MASS_7.3-60                   tidyselect_1.2.0             
## [105] stringi_1.8.1                 highr_0.10                   
## [107] yaml_2.3.7                    askpass_1.2.0                
## [109] locfit_1.5-9.4                latticeExtra_0.6-29          
## [111] sass_0.4.1                    VariantAnnotation_1.36.0     
## [113] tools_4.0.3                   circlize_0.4.15              
## [115] rstudioapi_0.15.0             TFMPvalue_0.0.8              
## [117] foreach_1.5.2                 foreign_0.8-84               
## [119] gridExtra_2.3                 farver_2.1.1                 
## [121] digest_0.6.33                 BiocManager_1.30.20          
## [123] shiny_1.7.1                   pracma_2.4.4                 
## [125] Rcpp_1.0.11                   car_3.0-12                   
## [127] broom_0.7.12                  BiocVersion_3.12.0           
## [129] later_1.3.1                   httr_1.4.2                   
## [131] AnnotationDbi_1.52.0          biovizBase_1.38.0            
## [133] Rdpack_2.6                    colorspace_2.1-0             
## [135] brio_1.1.3                    XML_3.99-0.15                
## [137] splines_4.0.3                 plotly_4.10.0                
## [139] xtable_1.8-4                  jsonlite_1.8.7               
## [141] poweRlaw_0.70.6               GenomicFiles_1.26.0          
## [143] UpSetR_1.4.0                  testthat_3.1.2               
## [145] R6_2.5.1                      Hmisc_4.6-0                  
## [147] pillar_1.9.0                  htmltools_0.5.7              
## [149] mime_0.12                     glue_1.6.2                   
## [151] fastmap_1.1.1                 DT_0.22                      
## [153] BiocParallel_1.24.1           interactiveDisplayBase_1.28.0
## [155] codetools_0.2-19              utf8_1.2.4                   
## [157] lattice_0.21-8                bslib_0.3.1                  
## [159] tibble_3.2.1                  curl_5.0.0                   
## [161] gtools_3.9.5                  GO.db_3.12.1                 
## [163] openssl_2.0.0                 survival_3.3-1               
## [165] limma_3.46.0                  rmarkdown_2.13               
## [167] desc_1.4.2                    munsell_0.5.0                
## [169] GetoptLong_1.0.5              GenomeInfoDbData_1.2.4       
## [171] iterators_1.0.14              reshape2_1.4.4               
## [173] gtable_0.3.3                  rbibutils_2.2.16